summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot/loader.go
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot/loader.go')
-rw-r--r--runsc/boot/loader.go125
1 files changed, 83 insertions, 42 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 65ac67dbf..19b738705 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -27,12 +27,12 @@ import (
gtime "time"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/memutil"
"gvisor.dev/gvisor/pkg/rand"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -181,9 +181,6 @@ type Args struct {
// New initializes a new kernel loader configured by spec.
// New also handles setting up a kernel for restoring a container.
func New(args Args) (*Loader, error) {
- // Sets the reference leak check mode
- refs.SetLeakMode(args.Conf.ReferenceLeakMode)
-
// We initialize the rand package now to make sure /dev/urandom is pre-opened
// on kernels that do not support getrandom(2).
if err := rand.Init(); err != nil {
@@ -527,15 +524,14 @@ func (l *Loader) run() error {
// ours either way.
l.rootProcArgs.FDTable = fdTable
- // cid for root container can be empty. Only subcontainers need it to set
- // the mount location.
- mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints)
- if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil {
+ // Setup the root container file system.
+ l.startGoferMonitor(l.sandboxID, l.goferFDs)
+
+ mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints)
+ if err := mntr.processHints(l.conf); err != nil {
return err
}
-
- rootCtx := l.rootProcArgs.NewContext(l.k)
- if err := setExecutablePath(rootCtx, &l.rootProcArgs); err != nil {
+ if err := setupContainerFS(ctx, l.conf, mntr, &l.rootProcArgs); err != nil {
return err
}
@@ -549,7 +545,7 @@ func (l *Loader) run() error {
}
}
if !hasHomeEnvv {
- homeDir, err := getExecUserHome(rootCtx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID))
+ homeDir, err := getExecUserHome(ctx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID))
if err != nil {
return fmt.Errorf("error reading exec user: %v", err)
}
@@ -631,7 +627,6 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
// sentry currently supports only 1 mount namespace, which is tied to a
// single user namespace. Thus we must run in the same user namespace
// to access mounts.
- // TODO(b/63601033): Create a new mount namespace for the container.
creds := auth.NewUserCredentials(
auth.KUID(spec.Process.User.UID),
auth.KGID(spec.Process.User.GID),
@@ -687,13 +682,12 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
goferFDs = append(goferFDs, fd)
}
- mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints)
- if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil {
- return fmt.Errorf("configuring container FS: %v", err)
- }
+ // Setup the child container file system.
+ l.startGoferMonitor(cid, goferFDs)
- if err := setExecutablePath(ctx, &procArgs); err != nil {
- return fmt.Errorf("setting executable path for %+v: %v", procArgs, err)
+ mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints)
+ if err := setupContainerFS(ctx, conf, mntr, &procArgs); err != nil {
+ return err
}
// Create and start the new process.
@@ -710,17 +704,59 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
return nil
}
+// startGoferMonitor runs a goroutine to monitor gofer's health. It polls on
+// the gofer FDs looking for disconnects, and destroys the container if a
+// disconnect occurs in any of the gofer FDs.
+func (l *Loader) startGoferMonitor(cid string, goferFDs []int) {
+ go func() {
+ log.Debugf("Monitoring gofer health for container %q", cid)
+ var events []unix.PollFd
+ for _, fd := range goferFDs {
+ events = append(events, unix.PollFd{
+ Fd: int32(fd),
+ Events: unix.POLLHUP | unix.POLLRDHUP,
+ })
+ }
+ _, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) {
+ // Use ppoll instead of poll because it's already whilelisted in seccomp.
+ n, err := unix.Ppoll(events, nil, nil)
+ return uintptr(n), 0, err
+ })
+ if err != nil {
+ panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err))
+ }
+
+ // Check if the gofer has stopped as part of normal container destruction.
+ // This is done just to avoid sending an annoying error message to the log.
+ // Note that there is a small race window in between mu.Unlock() and the
+ // lock being reacquired in destroyContainer(), but it's harmless to call
+ // destroyContainer() multiple times.
+ l.mu.Lock()
+ _, ok := l.processes[execID{cid: cid}]
+ l.mu.Unlock()
+ if ok {
+ log.Infof("Gofer socket disconnected, destroying container %q", cid)
+ if err := l.destroyContainer(cid); err != nil {
+ log.Warningf("Error destroying container %q after gofer stopped: %v", cid, err)
+ }
+ }
+ }()
+}
+
// destroyContainer stops a container if it is still running and cleans up its
// filesystem.
func (l *Loader) destroyContainer(cid string) error {
l.mu.Lock()
defer l.mu.Unlock()
- // Has the container started?
- _, _, err := l.threadGroupFromIDLocked(execID{cid: cid})
+ _, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid})
+ if err != nil {
+ // Container doesn't exist.
+ return err
+ }
- // If the container has started, kill and wait for all processes.
- if err == nil {
+ // The container exists, has it been started?
+ if started {
if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
}
@@ -754,27 +790,22 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
l.mu.Lock()
defer l.mu.Unlock()
- tg, _, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID})
+ tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID})
if err != nil {
- return 0, fmt.Errorf("no such container: %q", args.ContainerID)
+ return 0, err
+ }
+ if !started {
+ return 0, fmt.Errorf("container %q not started", args.ContainerID)
}
- // Get the container Root Dirent and MountNamespace from the Task.
+ // Get the container MountNamespace from the Task.
tg.Leader().WithMuLocked(func(t *kernel.Task) {
- // FSContext.RootDirectory() will take an extra ref for us.
- args.Root = t.FSContext().RootDirectory()
-
// task.MountNamespace() does not take a ref, so we must do so
// ourselves.
args.MountNamespace = t.MountNamespace()
args.MountNamespace.IncRef()
})
- defer func() {
- if args.Root != nil {
- args.Root.DecRef()
- }
- args.MountNamespace.DecRef()
- }()
+ defer args.MountNamespace.DecRef()
// Start the process.
proc := control.Proc{Kernel: l.k}
@@ -895,6 +926,8 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
}
+ s.FillDefaultIPTables()
+
return &s, nil
default:
@@ -1026,20 +1059,28 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) {
l.mu.Lock()
defer l.mu.Unlock()
- return l.threadGroupFromIDLocked(key)
+ tg, tty, ok, err := l.threadGroupFromIDLocked(key)
+ if err != nil {
+ return nil, nil, err
+ }
+ if !ok {
+ return nil, nil, fmt.Errorf("container %q not started", key.cid)
+ }
+ return tg, tty, nil
}
// threadGroupFromIDLocked returns the thread group and TTY for the given
// execution ID. TTY may be nil if the process is not attached to a terminal.
-// Returns error if execution ID is invalid or if container/process has not
-// started yet. Caller must hold 'mu'.
-func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) {
+// Also returns a boolean indicating whether the container has already started.
+// Returns error if execution ID is invalid or if the container cannot be
+// found (maybe it has been deleted). Caller must hold 'mu'.
+func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) {
ep := l.processes[key]
if ep == nil {
- return nil, nil, fmt.Errorf("container not found")
+ return nil, nil, false, fmt.Errorf("container %q not found", key.cid)
}
if ep.tg == nil {
- return nil, nil, fmt.Errorf("container not started")
+ return nil, nil, false, nil
}
- return ep.tg, ep.tty, nil
+ return ep.tg, ep.tty, true, nil
}