diff options
Diffstat (limited to 'runsc/boot/loader.go')
-rw-r--r-- | runsc/boot/loader.go | 125 |
1 files changed, 83 insertions, 42 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 65ac67dbf..19b738705 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -27,12 +27,12 @@ import ( gtime "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/memutil" "gvisor.dev/gvisor/pkg/rand" - "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -181,9 +181,6 @@ type Args struct { // New initializes a new kernel loader configured by spec. // New also handles setting up a kernel for restoring a container. func New(args Args) (*Loader, error) { - // Sets the reference leak check mode - refs.SetLeakMode(args.Conf.ReferenceLeakMode) - // We initialize the rand package now to make sure /dev/urandom is pre-opened // on kernels that do not support getrandom(2). if err := rand.Init(); err != nil { @@ -527,15 +524,14 @@ func (l *Loader) run() error { // ours either way. l.rootProcArgs.FDTable = fdTable - // cid for root container can be empty. Only subcontainers need it to set - // the mount location. - mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints) - if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil { + // Setup the root container file system. + l.startGoferMonitor(l.sandboxID, l.goferFDs) + + mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints) + if err := mntr.processHints(l.conf); err != nil { return err } - - rootCtx := l.rootProcArgs.NewContext(l.k) - if err := setExecutablePath(rootCtx, &l.rootProcArgs); err != nil { + if err := setupContainerFS(ctx, l.conf, mntr, &l.rootProcArgs); err != nil { return err } @@ -549,7 +545,7 @@ func (l *Loader) run() error { } } if !hasHomeEnvv { - homeDir, err := getExecUserHome(rootCtx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID)) + homeDir, err := getExecUserHome(ctx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID)) if err != nil { return fmt.Errorf("error reading exec user: %v", err) } @@ -631,7 +627,6 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file // sentry currently supports only 1 mount namespace, which is tied to a // single user namespace. Thus we must run in the same user namespace // to access mounts. - // TODO(b/63601033): Create a new mount namespace for the container. creds := auth.NewUserCredentials( auth.KUID(spec.Process.User.UID), auth.KGID(spec.Process.User.GID), @@ -687,13 +682,12 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file goferFDs = append(goferFDs, fd) } - mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints) - if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil { - return fmt.Errorf("configuring container FS: %v", err) - } + // Setup the child container file system. + l.startGoferMonitor(cid, goferFDs) - if err := setExecutablePath(ctx, &procArgs); err != nil { - return fmt.Errorf("setting executable path for %+v: %v", procArgs, err) + mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints) + if err := setupContainerFS(ctx, conf, mntr, &procArgs); err != nil { + return err } // Create and start the new process. @@ -710,17 +704,59 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file return nil } +// startGoferMonitor runs a goroutine to monitor gofer's health. It polls on +// the gofer FDs looking for disconnects, and destroys the container if a +// disconnect occurs in any of the gofer FDs. +func (l *Loader) startGoferMonitor(cid string, goferFDs []int) { + go func() { + log.Debugf("Monitoring gofer health for container %q", cid) + var events []unix.PollFd + for _, fd := range goferFDs { + events = append(events, unix.PollFd{ + Fd: int32(fd), + Events: unix.POLLHUP | unix.POLLRDHUP, + }) + } + _, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) { + // Use ppoll instead of poll because it's already whilelisted in seccomp. + n, err := unix.Ppoll(events, nil, nil) + return uintptr(n), 0, err + }) + if err != nil { + panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err)) + } + + // Check if the gofer has stopped as part of normal container destruction. + // This is done just to avoid sending an annoying error message to the log. + // Note that there is a small race window in between mu.Unlock() and the + // lock being reacquired in destroyContainer(), but it's harmless to call + // destroyContainer() multiple times. + l.mu.Lock() + _, ok := l.processes[execID{cid: cid}] + l.mu.Unlock() + if ok { + log.Infof("Gofer socket disconnected, destroying container %q", cid) + if err := l.destroyContainer(cid); err != nil { + log.Warningf("Error destroying container %q after gofer stopped: %v", cid, err) + } + } + }() +} + // destroyContainer stops a container if it is still running and cleans up its // filesystem. func (l *Loader) destroyContainer(cid string) error { l.mu.Lock() defer l.mu.Unlock() - // Has the container started? - _, _, err := l.threadGroupFromIDLocked(execID{cid: cid}) + _, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid}) + if err != nil { + // Container doesn't exist. + return err + } - // If the container has started, kill and wait for all processes. - if err == nil { + // The container exists, has it been started? + if started { if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil { return fmt.Errorf("sending SIGKILL to all container processes: %v", err) } @@ -754,27 +790,22 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { l.mu.Lock() defer l.mu.Unlock() - tg, _, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID}) + tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID}) if err != nil { - return 0, fmt.Errorf("no such container: %q", args.ContainerID) + return 0, err + } + if !started { + return 0, fmt.Errorf("container %q not started", args.ContainerID) } - // Get the container Root Dirent and MountNamespace from the Task. + // Get the container MountNamespace from the Task. tg.Leader().WithMuLocked(func(t *kernel.Task) { - // FSContext.RootDirectory() will take an extra ref for us. - args.Root = t.FSContext().RootDirectory() - // task.MountNamespace() does not take a ref, so we must do so // ourselves. args.MountNamespace = t.MountNamespace() args.MountNamespace.IncRef() }) - defer func() { - if args.Root != nil { - args.Root.DecRef() - } - args.MountNamespace.DecRef() - }() + defer args.MountNamespace.DecRef() // Start the process. proc := control.Proc{Kernel: l.k} @@ -895,6 +926,8 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) } + s.FillDefaultIPTables() + return &s, nil default: @@ -1026,20 +1059,28 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error { func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) { l.mu.Lock() defer l.mu.Unlock() - return l.threadGroupFromIDLocked(key) + tg, tty, ok, err := l.threadGroupFromIDLocked(key) + if err != nil { + return nil, nil, err + } + if !ok { + return nil, nil, fmt.Errorf("container %q not started", key.cid) + } + return tg, tty, nil } // threadGroupFromIDLocked returns the thread group and TTY for the given // execution ID. TTY may be nil if the process is not attached to a terminal. -// Returns error if execution ID is invalid or if container/process has not -// started yet. Caller must hold 'mu'. -func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) { +// Also returns a boolean indicating whether the container has already started. +// Returns error if execution ID is invalid or if the container cannot be +// found (maybe it has been deleted). Caller must hold 'mu'. +func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) { ep := l.processes[key] if ep == nil { - return nil, nil, fmt.Errorf("container not found") + return nil, nil, false, fmt.Errorf("container %q not found", key.cid) } if ep.tg == nil { - return nil, nil, fmt.Errorf("container not started") + return nil, nil, false, nil } - return ep.tg, ep.tty, nil + return ep.tg, ep.tty, true, nil } |