summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/BUILD1
-rw-r--r--runsc/boot/config.go18
-rw-r--r--runsc/boot/controller.go12
-rw-r--r--runsc/boot/filter/config.go2
-rw-r--r--runsc/boot/fs.go182
-rw-r--r--runsc/boot/loader.go125
-rw-r--r--runsc/boot/loader_test.go19
-rw-r--r--runsc/boot/network.go42
-rw-r--r--runsc/boot/user_test.go12
9 files changed, 231 insertions, 182 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 5025401dd..588bb8851 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -88,6 +88,7 @@ go_library(
"//runsc/specutils",
"@com_github_golang_protobuf//proto:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 6a742f349..05b8f8761 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -113,7 +113,7 @@ func MakeWatchdogAction(s string) (watchdog.Action, error) {
}
}
-// MakeRefsLeakMode converts type from string
+// MakeRefsLeakMode converts type from string.
func MakeRefsLeakMode(s string) (refs.LeakMode, error) {
switch strings.ToLower(s) {
case "disabled":
@@ -127,6 +127,20 @@ func MakeRefsLeakMode(s string) (refs.LeakMode, error) {
}
}
+func refsLeakModeToString(mode refs.LeakMode) string {
+ switch mode {
+ // If not set, default it to disabled.
+ case refs.UninitializedLeakChecking, refs.NoLeakChecking:
+ return "disabled"
+ case refs.LeaksLogWarning:
+ return "log-names"
+ case refs.LeaksLogTraces:
+ return "log-traces"
+ default:
+ panic(fmt.Sprintf("Invalid leakmode: %d", mode))
+ }
+}
+
// Config holds configuration that is not part of the runtime spec.
type Config struct {
// RootDir is the runtime root directory.
@@ -245,7 +259,7 @@ func (c *Config) ToFlags() []string {
"--num-network-channels=" + strconv.Itoa(c.NumNetworkChannels),
"--rootless=" + strconv.FormatBool(c.Rootless),
"--alsologtostderr=" + strconv.FormatBool(c.AlsoLogToStderr),
- "--ref-leak-mode=" + c.ReferenceLeakMode.String(),
+ "--ref-leak-mode=" + refsLeakModeToString(c.ReferenceLeakMode),
}
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
// Only include if set since it is never to be used by users.
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index d79aaff60..72cbabd16 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -328,10 +328,8 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
return fmt.Errorf("at most two files may be passed to Restore")
}
- networkStack := cm.l.k.NetworkStack()
- // Destroy the old kernel and create a new kernel.
+ // Pause the kernel while we build a new one.
cm.l.k.Pause()
- cm.l.k.Destroy()
p, err := createPlatform(cm.l.conf, deviceFile)
if err != nil {
@@ -345,10 +343,11 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
return fmt.Errorf("creating memory file: %v", err)
}
k.SetMemoryFile(mf)
+ networkStack := cm.l.k.NetworkStack()
cm.l.k = k
// Set up the restore environment.
- mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k, cm.l.mountHints)
+ mntr := newContainerMounter(cm.l.spec, cm.l.goferFDs, cm.l.k, cm.l.mountHints)
renv, err := mntr.createRestoreEnvironment(cm.l.conf)
if err != nil {
return fmt.Errorf("creating RestoreEnvironment: %v", err)
@@ -380,13 +379,10 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
// Load the state.
loadOpts := state.LoadOpts{Source: specFile}
- if err := loadOpts.Load(k, networkStack); err != nil {
+ if err := loadOpts.Load(k, networkStack, time.NewCalibratedClocks()); err != nil {
return err
}
- // Set timekeeper.
- k.Timekeeper().SetClocks(time.NewCalibratedClocks())
-
// Since we have a new kernel we also must make a new watchdog.
dog := watchdog.New(k, watchdog.DefaultTimeout, cm.l.conf.WatchdogAction)
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 0ee5b8bbd..7ca776b3a 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -207,7 +207,7 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_MPROTECT: {},
syscall.SYS_MUNMAP: {},
syscall.SYS_NANOSLEEP: {},
- syscall.SYS_POLL: {},
+ syscall.SYS_PPOLL: {},
syscall.SYS_PREAD64: {},
syscall.SYS_PWRITE64: {},
syscall.SYS_READ: {},
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 7e95e1f41..34c674840 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -25,10 +25,8 @@ import (
// Include filesystem types that OCI spec might mount.
_ "gvisor.dev/gvisor/pkg/sentry/fs/dev"
- "gvisor.dev/gvisor/pkg/sentry/fs/gofer"
_ "gvisor.dev/gvisor/pkg/sentry/fs/host"
_ "gvisor.dev/gvisor/pkg/sentry/fs/proc"
- "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
_ "gvisor.dev/gvisor/pkg/sentry/fs/sys"
_ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
_ "gvisor.dev/gvisor/pkg/sentry/fs/tty"
@@ -38,6 +36,8 @@ import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/gofer"
+ "gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/syserror"
@@ -263,6 +263,18 @@ func subtargets(root string, mnts []specs.Mount) []string {
return targets
}
+func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+ mns, err := mntr.setupFS(conf, procArgs)
+ if err != nil {
+ return err
+ }
+
+ // Set namespace here so that it can be found in ctx.
+ procArgs.MountNamespace = mns
+
+ return setExecutablePath(ctx, procArgs)
+}
+
// setExecutablePath sets the procArgs.Filename by searching the PATH for an
// executable matching the procArgs.Argv[0].
func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
@@ -478,9 +490,6 @@ func (p *podMountHints) findMount(mount specs.Mount) *mountHint {
}
type containerMounter struct {
- // cid is the container ID. May be set to empty for the root container.
- cid string
-
root *specs.Root
// mounts is the set of submounts for the container. It's a copy from the spec
@@ -495,9 +504,8 @@ type containerMounter struct {
hints *podMountHints
}
-func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter {
+func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter {
return &containerMounter{
- cid: cid,
root: spec.Root,
mounts: compileMounts(spec),
fds: fdDispenser{fds: goferFDs},
@@ -506,63 +514,88 @@ func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel
}
}
-// setupFS is used to set up the file system for containers and amend
-// the procArgs accordingly. This is the main entry point for this rest of
-// functions in this file. procArgs are passed by reference and the FDMap field
-// is modified. It dups stdioFDs.
-func (c *containerMounter) setupFS(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs, creds *auth.Credentials) error {
- // Use root user to configure mounts. The current user might not have
- // permission to do so.
- rootProcArgs := kernel.CreateProcessArgs{
- WorkingDirectory: "/",
- Credentials: auth.NewRootCredentials(creds.UserNamespace),
- Umask: 0022,
- MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
- PIDNamespace: procArgs.PIDNamespace,
+// processHints processes annotations that container hints about how volumes
+// should be mounted (e.g. a volume shared between containers). It must be
+// called for the root container only.
+func (c *containerMounter) processHints(conf *Config) error {
+ ctx := c.k.SupervisorContext()
+ for _, hint := range c.hints.mounts {
+ log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
+ inode, err := c.mountSharedMaster(ctx, conf, hint)
+ if err != nil {
+ return fmt.Errorf("mounting shared master %q: %v", hint.name, err)
+ }
+ hint.root = inode
}
+ return nil
+}
+
+// setupFS is used to set up the file system for all containers. This is the
+// main entry point method, with most of the other being internal only. It
+// returns the mount namespace that is created for the container.
+func (c *containerMounter) setupFS(conf *Config, procArgs *kernel.CreateProcessArgs) (*fs.MountNamespace, error) {
+ log.Infof("Configuring container's file system")
+
+ // Create context with root credentials to mount the filesystem (the current
+ // user may not be privileged enough).
+ rootProcArgs := *procArgs
+ rootProcArgs.WorkingDirectory = "/"
+ rootProcArgs.Credentials = auth.NewRootCredentials(procArgs.Credentials.UserNamespace)
+ rootProcArgs.Umask = 0022
+ rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals
rootCtx := rootProcArgs.NewContext(c.k)
- // If this is the root container, we also need to setup the root mount
- // namespace.
- rootMNS := c.k.RootMountNamespace()
- if rootMNS == nil {
- // Setup the root container.
- if err := c.setupRootContainer(ctx, rootCtx, conf, func(rootMNS *fs.MountNamespace) {
- // The callback to setupRootContainer inherits a
- // reference on the rootMNS, so we don't need to take
- // an additional reference here.
- procArgs.MountNamespace = rootMNS
- procArgs.Root = rootMNS.Root()
- c.k.SetRootMountNamespace(rootMNS)
- }); err != nil {
- return err
- }
- return c.checkDispenser()
+ mns, err := c.createMountNamespace(rootCtx, conf)
+ if err != nil {
+ return nil, err
}
- // Setup a child container.
- log.Infof("Creating new process in child container.")
+ // Set namespace here so that it can be found in rootCtx.
+ rootProcArgs.MountNamespace = mns
- // Create a new root inode and mount namespace for the container.
- rootInode, err := c.createRootMount(rootCtx, conf)
+ if err := c.mountSubmounts(rootCtx, conf, mns); err != nil {
+ return nil, err
+ }
+ return mns, nil
+}
+
+func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Config) (*fs.MountNamespace, error) {
+ rootInode, err := c.createRootMount(ctx, conf)
if err != nil {
- return fmt.Errorf("creating filesystem for container: %v", err)
+ return nil, fmt.Errorf("creating filesystem for container: %v", err)
}
- mns, err := fs.NewMountNamespace(rootCtx, rootInode)
+ mns, err := fs.NewMountNamespace(ctx, rootInode)
if err != nil {
- return fmt.Errorf("creating new mount namespace for container: %v", err)
+ return nil, fmt.Errorf("creating new mount namespace for container: %v", err)
}
+ return mns, nil
+}
- // Set process root here, so 'rootCtx.Value(CtxRoot)' will return it.
- // This will also donate a reference to procArgs, as required.
- procArgs.MountNamespace = mns
- procArgs.Root = mns.Root()
+func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace) error {
+ root := mns.Root()
+ defer root.DecRef()
- // Mount all submounts.
- if err := c.mountSubmounts(rootCtx, conf, mns, procArgs.Root); err != nil {
+ for _, m := range c.mounts {
+ log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options)
+ if hint := c.hints.findMount(m); hint != nil && hint.isSupported() {
+ if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil {
+ return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err)
+ }
+ } else {
+ if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
+ return fmt.Errorf("mount submount %q: %v", m.Destination, err)
+ }
+ }
+ }
+
+ if err := c.mountTmp(ctx, conf, mns, root); err != nil {
+ return fmt.Errorf("mount submount %q: %v", "tmp", err)
+ }
+
+ if err := c.checkDispenser(); err != nil {
return err
}
- return c.checkDispenser()
+ return nil
}
func (c *containerMounter) checkDispenser() error {
@@ -572,36 +605,6 @@ func (c *containerMounter) checkDispenser() error {
return nil
}
-// setupRootContainer creates a mount namespace containing the root filesystem
-// and all mounts. 'rootCtx' is used to walk directories to find mount points.
-// The 'setMountNS' callback is called after the mount namespace is created and
-// will get a reference on that namespace. The callback must ensure that the
-// rootCtx has the provided mount namespace.
-func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx context.Context, conf *Config, setMountNS func(*fs.MountNamespace)) error {
- for _, hint := range c.hints.mounts {
- log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
- inode, err := c.mountSharedMaster(rootCtx, conf, hint)
- if err != nil {
- return fmt.Errorf("mounting shared master %q: %v", hint.name, err)
- }
- hint.root = inode
- }
-
- rootInode, err := c.createRootMount(rootCtx, conf)
- if err != nil {
- return fmt.Errorf("creating root mount: %v", err)
- }
- mns, err := fs.NewMountNamespace(userCtx, rootInode)
- if err != nil {
- return fmt.Errorf("creating root mount namespace: %v", err)
- }
- setMountNS(mns)
-
- root := mns.Root()
- defer root.DecRef()
- return c.mountSubmounts(rootCtx, conf, mns, root)
-}
-
// mountSharedMaster mounts the master of a volume that is shared among
// containers in a pod. It returns the root mount's inode.
func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *Config, hint *mountHint) (*fs.Inode, error) {
@@ -717,25 +720,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
return fsName, opts, useOverlay, err
}
-func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error {
- for _, m := range c.mounts {
- if hint := c.hints.findMount(m); hint != nil && hint.isSupported() {
- if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil {
- return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err)
- }
- } else {
- if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil {
- return fmt.Errorf("mount submount %q: %v", m.Destination, err)
- }
- }
- }
-
- if err := c.mountTmp(ctx, conf, mns, root); err != nil {
- return fmt.Errorf("mount submount %q: %v", "tmp", err)
- }
- return nil
-}
-
// mountSubmount mounts volumes inside the container's root. Because mounts may
// be readonly, a lower ramfs overlay is added to create the mount point dir.
// Another overlay is added with tmpfs on top if Config.Overlay is true.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 65ac67dbf..19b738705 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -27,12 +27,12 @@ import (
gtime "time"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/memutil"
"gvisor.dev/gvisor/pkg/rand"
- "gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -181,9 +181,6 @@ type Args struct {
// New initializes a new kernel loader configured by spec.
// New also handles setting up a kernel for restoring a container.
func New(args Args) (*Loader, error) {
- // Sets the reference leak check mode
- refs.SetLeakMode(args.Conf.ReferenceLeakMode)
-
// We initialize the rand package now to make sure /dev/urandom is pre-opened
// on kernels that do not support getrandom(2).
if err := rand.Init(); err != nil {
@@ -527,15 +524,14 @@ func (l *Loader) run() error {
// ours either way.
l.rootProcArgs.FDTable = fdTable
- // cid for root container can be empty. Only subcontainers need it to set
- // the mount location.
- mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints)
- if err := mntr.setupFS(ctx, l.conf, &l.rootProcArgs, l.rootProcArgs.Credentials); err != nil {
+ // Setup the root container file system.
+ l.startGoferMonitor(l.sandboxID, l.goferFDs)
+
+ mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints)
+ if err := mntr.processHints(l.conf); err != nil {
return err
}
-
- rootCtx := l.rootProcArgs.NewContext(l.k)
- if err := setExecutablePath(rootCtx, &l.rootProcArgs); err != nil {
+ if err := setupContainerFS(ctx, l.conf, mntr, &l.rootProcArgs); err != nil {
return err
}
@@ -549,7 +545,7 @@ func (l *Loader) run() error {
}
}
if !hasHomeEnvv {
- homeDir, err := getExecUserHome(rootCtx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID))
+ homeDir, err := getExecUserHome(ctx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID))
if err != nil {
return fmt.Errorf("error reading exec user: %v", err)
}
@@ -631,7 +627,6 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
// sentry currently supports only 1 mount namespace, which is tied to a
// single user namespace. Thus we must run in the same user namespace
// to access mounts.
- // TODO(b/63601033): Create a new mount namespace for the container.
creds := auth.NewUserCredentials(
auth.KUID(spec.Process.User.UID),
auth.KGID(spec.Process.User.GID),
@@ -687,13 +682,12 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
goferFDs = append(goferFDs, fd)
}
- mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints)
- if err := mntr.setupFS(ctx, conf, &procArgs, creds); err != nil {
- return fmt.Errorf("configuring container FS: %v", err)
- }
+ // Setup the child container file system.
+ l.startGoferMonitor(cid, goferFDs)
- if err := setExecutablePath(ctx, &procArgs); err != nil {
- return fmt.Errorf("setting executable path for %+v: %v", procArgs, err)
+ mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints)
+ if err := setupContainerFS(ctx, conf, mntr, &procArgs); err != nil {
+ return err
}
// Create and start the new process.
@@ -710,17 +704,59 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
return nil
}
+// startGoferMonitor runs a goroutine to monitor gofer's health. It polls on
+// the gofer FDs looking for disconnects, and destroys the container if a
+// disconnect occurs in any of the gofer FDs.
+func (l *Loader) startGoferMonitor(cid string, goferFDs []int) {
+ go func() {
+ log.Debugf("Monitoring gofer health for container %q", cid)
+ var events []unix.PollFd
+ for _, fd := range goferFDs {
+ events = append(events, unix.PollFd{
+ Fd: int32(fd),
+ Events: unix.POLLHUP | unix.POLLRDHUP,
+ })
+ }
+ _, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) {
+ // Use ppoll instead of poll because it's already whilelisted in seccomp.
+ n, err := unix.Ppoll(events, nil, nil)
+ return uintptr(n), 0, err
+ })
+ if err != nil {
+ panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err))
+ }
+
+ // Check if the gofer has stopped as part of normal container destruction.
+ // This is done just to avoid sending an annoying error message to the log.
+ // Note that there is a small race window in between mu.Unlock() and the
+ // lock being reacquired in destroyContainer(), but it's harmless to call
+ // destroyContainer() multiple times.
+ l.mu.Lock()
+ _, ok := l.processes[execID{cid: cid}]
+ l.mu.Unlock()
+ if ok {
+ log.Infof("Gofer socket disconnected, destroying container %q", cid)
+ if err := l.destroyContainer(cid); err != nil {
+ log.Warningf("Error destroying container %q after gofer stopped: %v", cid, err)
+ }
+ }
+ }()
+}
+
// destroyContainer stops a container if it is still running and cleans up its
// filesystem.
func (l *Loader) destroyContainer(cid string) error {
l.mu.Lock()
defer l.mu.Unlock()
- // Has the container started?
- _, _, err := l.threadGroupFromIDLocked(execID{cid: cid})
+ _, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid})
+ if err != nil {
+ // Container doesn't exist.
+ return err
+ }
- // If the container has started, kill and wait for all processes.
- if err == nil {
+ // The container exists, has it been started?
+ if started {
if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
}
@@ -754,27 +790,22 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
l.mu.Lock()
defer l.mu.Unlock()
- tg, _, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID})
+ tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID})
if err != nil {
- return 0, fmt.Errorf("no such container: %q", args.ContainerID)
+ return 0, err
+ }
+ if !started {
+ return 0, fmt.Errorf("container %q not started", args.ContainerID)
}
- // Get the container Root Dirent and MountNamespace from the Task.
+ // Get the container MountNamespace from the Task.
tg.Leader().WithMuLocked(func(t *kernel.Task) {
- // FSContext.RootDirectory() will take an extra ref for us.
- args.Root = t.FSContext().RootDirectory()
-
// task.MountNamespace() does not take a ref, so we must do so
// ourselves.
args.MountNamespace = t.MountNamespace()
args.MountNamespace.IncRef()
})
- defer func() {
- if args.Root != nil {
- args.Root.DecRef()
- }
- args.MountNamespace.DecRef()
- }()
+ defer args.MountNamespace.DecRef()
// Start the process.
proc := control.Proc{Kernel: l.k}
@@ -895,6 +926,8 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
}
+ s.FillDefaultIPTables()
+
return &s, nil
default:
@@ -1026,20 +1059,28 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) {
l.mu.Lock()
defer l.mu.Unlock()
- return l.threadGroupFromIDLocked(key)
+ tg, tty, ok, err := l.threadGroupFromIDLocked(key)
+ if err != nil {
+ return nil, nil, err
+ }
+ if !ok {
+ return nil, nil, fmt.Errorf("container %q not started", key.cid)
+ }
+ return tg, tty, nil
}
// threadGroupFromIDLocked returns the thread group and TTY for the given
// execution ID. TTY may be nil if the process is not attached to a terminal.
-// Returns error if execution ID is invalid or if container/process has not
-// started yet. Caller must hold 'mu'.
-func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) {
+// Also returns a boolean indicating whether the container has already started.
+// Returns error if execution ID is invalid or if the container cannot be
+// found (maybe it has been deleted). Caller must hold 'mu'.
+func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) {
ep := l.processes[key]
if ep == nil {
- return nil, nil, fmt.Errorf("container not found")
+ return nil, nil, false, fmt.Errorf("container %q not found", key.cid)
}
if ep.tg == nil {
- return nil, nil, fmt.Errorf("container not started")
+ return nil, nil, false, nil
}
- return ep.tg, ep.tty, nil
+ return ep.tg, ep.tty, true, nil
}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index ff713660d..147ff7703 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -401,17 +401,16 @@ func TestCreateMountNamespace(t *testing.T) {
}
defer cleanup()
- // setupRootContainer needs to find root from the context after the
- // namespace is created.
- var mns *fs.MountNamespace
- setMountNS := func(m *fs.MountNamespace) {
- mns = m
- ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
+ mntr := newContainerMounter(&tc.spec, []int{sandEnd}, nil, &podMountHints{})
+ mns, err := mntr.createMountNamespace(ctx, conf)
+ if err != nil {
+ t.Fatalf("failed to create mount namespace: %v", err)
}
- mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil, &podMountHints{})
- if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
- t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err)
+ ctx = fs.WithRoot(ctx, mns.Root())
+ if err := mntr.mountSubmounts(ctx, conf, mns); err != nil {
+ t.Fatalf("failed to create mount namespace: %v", err)
}
+
root := mns.Root()
defer root.DecRef()
for _, p := range tc.expectedPaths {
@@ -614,7 +613,7 @@ func TestRestoreEnvironment(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
conf := testConfig()
- mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil, &podMountHints{})
+ mntr := newContainerMounter(tc.spec, tc.ioFDs, nil, &podMountHints{})
actualRenv, err := mntr.createRestoreEnvironment(conf)
if !tc.errorExpected && err != nil {
t.Fatalf("could not create restore environment for test:%s", tc.name)
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index d3d98243d..ea0d9f790 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -38,8 +38,7 @@ type Network struct {
// Route represents a route in the network stack.
type Route struct {
- Destination net.IP
- Mask net.IPMask
+ Destination net.IPNet
Gateway net.IP
}
@@ -85,16 +84,19 @@ type CreateLinksAndRoutesArgs struct {
// Empty returns true if route hasn't been set.
func (r *Route) Empty() bool {
- return r.Destination == nil && r.Mask == nil && r.Gateway == nil
+ return r.Destination.IP == nil && r.Destination.Mask == nil && r.Gateway == nil
}
-func (r *Route) toTcpipRoute(id tcpip.NICID) tcpip.Route {
+func (r *Route) toTcpipRoute(id tcpip.NICID) (tcpip.Route, error) {
+ subnet, err := tcpip.NewSubnet(ipToAddress(r.Destination.IP), ipMaskToAddressMask(r.Destination.Mask))
+ if err != nil {
+ return tcpip.Route{}, err
+ }
return tcpip.Route{
- Destination: ipToAddress(r.Destination),
+ Destination: subnet,
Gateway: ipToAddress(r.Gateway),
- Mask: ipToAddressMask(net.IP(r.Mask)),
NIC: id,
- }
+ }, nil
}
// CreateLinksAndRoutes creates links and routes in a network stack. It should
@@ -128,7 +130,11 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
// Collect the routes from this link.
for _, r := range link.Routes {
- routes = append(routes, r.toTcpipRoute(nicID))
+ route, err := r.toTcpipRoute(nicID)
+ if err != nil {
+ return err
+ }
+ routes = append(routes, route)
}
}
@@ -170,7 +176,11 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
// Collect the routes from this link.
for _, r := range link.Routes {
- routes = append(routes, r.toTcpipRoute(nicID))
+ route, err := r.toTcpipRoute(nicID)
+ if err != nil {
+ return err
+ }
+ routes = append(routes, route)
}
}
@@ -179,7 +189,11 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
if !ok {
return fmt.Errorf("invalid interface name %q for default route", args.DefaultGateway.Name)
}
- routes = append(routes, args.DefaultGateway.Route.toTcpipRoute(nicID))
+ route, err := args.DefaultGateway.Route.toTcpipRoute(nicID)
+ if err != nil {
+ return err
+ }
+ routes = append(routes, route)
}
log.Infof("Setting routes %+v", routes)
@@ -230,8 +244,8 @@ func ipToAddress(ip net.IP) tcpip.Address {
return addr
}
-// ipToAddressMask converts IP to tcpip.AddressMask, ignoring the protocol.
-func ipToAddressMask(ip net.IP) tcpip.AddressMask {
- _, addr := ipToAddressAndProto(ip)
- return tcpip.AddressMask(addr)
+// ipMaskToAddressMask converts IPMask to tcpip.AddressMask, ignoring the
+// protocol.
+func ipMaskToAddressMask(ipMask net.IPMask) tcpip.AddressMask {
+ return tcpip.AddressMask(ipToAddress(net.IP(ipMask)))
}
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
index 834003430..906baf3e5 100644
--- a/runsc/boot/user_test.go
+++ b/runsc/boot/user_test.go
@@ -164,13 +164,13 @@ func TestGetExecUserHome(t *testing.T) {
},
}
- var mns *fs.MountNamespace
- setMountNS := func(m *fs.MountNamespace) {
- mns = m
- ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
+ mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{})
+ mns, err := mntr.createMountNamespace(ctx, conf)
+ if err != nil {
+ t.Fatalf("failed to create mount namespace: %v", err)
}
- mntr := newContainerMounter(spec, "", []int{sandEnd}, nil, &podMountHints{})
- if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
+ ctx = fs.WithRoot(ctx, mns.Root())
+ if err := mntr.mountSubmounts(ctx, conf, mns); err != nil {
t.Fatalf("failed to create mount namespace: %v", err)
}