summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/controller.go19
-rw-r--r--runsc/boot/fs.go159
-rw-r--r--runsc/boot/loader.go36
3 files changed, 150 insertions, 64 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index fc6ea326a..69e88d8e0 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -17,6 +17,7 @@ package boot
import (
"errors"
"fmt"
+ "path"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.googlesource.com/gvisor/pkg/control/server"
@@ -181,11 +182,15 @@ type StartArgs struct {
// CID is the ID of the container to start.
CID string
+
+ // FilePayload contains the file descriptor over which the sandbox will
+ // request files from its root filesystem.
+ urpc.FilePayload
}
// Start runs a created container within a sandbox.
func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
- log.Debugf("containerManager.Start")
+ log.Debugf("containerManager.Start: %+v", args)
// Validate arguments.
if args == nil {
@@ -200,8 +205,18 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
if args.CID == "" {
return errors.New("start argument missing container ID")
}
+ // Prevent CIDs containing ".." from confusing the sentry when creating
+ // /containers/<cid> directory.
+ // TODO: Once we have multiple independant roots, this
+ // check won't be necessary.
+ if path.Clean(args.CID) != args.CID {
+ return fmt.Errorf("container ID shouldn't contain directory traversals such as \"..\": %q", args.CID)
+ }
+ if len(args.FilePayload.Files) != 1 {
+ return fmt.Errorf("start arguments must contain one file for the container root")
+ }
- tgid, err := cm.l.startContainer(args, cm.l.k)
+ tgid, err := cm.l.startContainer(cm.l.k, args.Spec, args.Conf, args.CID, args.FilePayload.Files[0])
if err != nil {
return err
}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index eea2ec1f5..8996b1398 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -48,6 +48,19 @@ const (
// Device name for root mount.
rootDevice = "9pfs-/"
+
+ // childContainersDir is the directory where child container root
+ // filesystems are mounted.
+ childContainersDir = "/__runsc_containers__"
+
+ // Filesystems that runsc supports.
+ bind = "bind"
+ devpts = "devpts"
+ devtmpfs = "devtmpfs"
+ proc = "proc"
+ sysfs = "sysfs"
+ tmpfs = "tmpfs"
+ nonefs = "none"
)
type fdDispenser struct {
@@ -70,8 +83,15 @@ func (f *fdDispenser) empty() bool {
// createMountNamespace creates a mount namespace containing the root filesystem
// and all mounts. 'rootCtx' is used to walk directories to find mount points.
func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec *specs.Spec, conf *Config, ioFDs []int) (*fs.MountNamespace, error) {
+ mounts := compileMounts(spec)
+ // Create a tmpfs mount where we create and mount a root filesystem for
+ // each child container.
+ mounts = append(mounts, specs.Mount{
+ Type: tmpfs,
+ Destination: childContainersDir,
+ })
fds := &fdDispenser{fds: ioFDs}
- rootInode, err := createRootMount(rootCtx, spec, conf, fds)
+ rootInode, err := createRootMount(rootCtx, spec, conf, fds, mounts)
if err != nil {
return nil, fmt.Errorf("failed to create root mount: %v", err)
}
@@ -79,7 +99,7 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec
if err != nil {
return nil, fmt.Errorf("failed to create root mount namespace: %v", err)
}
- mounts := compileMounts(spec)
+
if err := setMounts(rootCtx, conf, mns, fds, mounts); err != nil {
return nil, fmt.Errorf("failed to configure mounts: %v", err)
}
@@ -98,12 +118,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
// Always mount /dev.
mounts = append(mounts, specs.Mount{
- Type: "devtmpfs",
+ Type: devtmpfs,
Destination: "/dev",
})
mounts = append(mounts, specs.Mount{
- Type: "devpts",
+ Type: devpts,
Destination: "/dev/pts",
})
@@ -129,13 +149,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
var mandatoryMounts []specs.Mount
if !procMounted {
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: "proc",
+ Type: proc,
Destination: "/proc",
})
}
if !sysMounted {
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: "sysfs",
+ Type: sysfs,
Destination: "/sys",
})
}
@@ -149,7 +169,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
// that. Until then, the /tmp mount will always appear empty at
// container creation.
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: "tmpfs",
+ Type: tmpfs,
Destination: "/tmp",
})
}
@@ -165,7 +185,7 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
// mount namespace.
func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, mounts []specs.Mount) error {
for _, m := range mounts {
- if err := mountSubmount(ctx, conf, mns, fds, m, mounts); err != nil {
+ if err := mountSubmount(ctx, conf, mns, fds, m, mounts, m.Destination); err != nil {
return err
}
}
@@ -173,7 +193,7 @@ func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *f
}
// createRootMount creates the root filesystem.
-func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser) (*fs.Inode, error) {
+func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *fdDispenser, mounts []specs.Mount) (*fs.Inode, error) {
// First construct the filesystem from the spec.Root.
mf := fs.MountSourceFlags{ReadOnly: spec.Root.Readonly}
@@ -207,7 +227,7 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f
// We need to overlay the root on top of a ramfs with stub directories
// for submount paths. "/dev" "/sys" "/proc" and "/tmp" are always
// mounted even if they are not in the spec.
- submounts := append(subtargets("/", spec.Mounts), "/dev", "/sys", "/proc", "/tmp")
+ submounts := append(subtargets("/", mounts), "/dev", "/sys", "/proc", "/tmp")
rootInode, err = addSubmountOverlay(ctx, rootInode, submounts)
if err != nil {
return nil, fmt.Errorf("error adding submount overlay: %v", err)
@@ -256,17 +276,17 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri
)
switch m.Type {
- case "devpts", "devtmpfs", "proc", "sysfs":
+ case devpts, devtmpfs, proc, sysfs:
fsName = m.Type
- case "none":
- fsName = "sysfs"
- case "tmpfs":
+ case nonefs:
+ fsName = sysfs
+ case tmpfs:
fsName = m.Type
// tmpfs has some extra supported options that we must pass through.
opts, err = parseAndFilterOptions(m.Options, "mode", "uid", "gid")
- case "bind":
+ case bind:
switch conf.FileAccess {
case FileAccessProxy, FileAccessProxyExclusive:
fd := fds.remove()
@@ -291,7 +311,7 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri
return fsName, opts, useOverlay, err
}
-func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount) error {
+func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount, dest string) error {
// Map mount type to filesystem name, and parse out the options that we are
// capable of dealing with.
fsName, opts, useOverlay, err := getMountNameAndOptions(conf, m, fds)
@@ -342,51 +362,52 @@ func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fd
// in the right location, e.g.
// mount: /var/run/secrets, may be created in '/run/secrets' if
// '/var/run' => '/var'.
- if err := mkdirAll(ctx, mns, m.Destination); err != nil {
+ if err := mkdirAll(ctx, mns, dest); err != nil {
return err
}
root := mns.Root()
defer root.DecRef()
- dirent, err := mns.FindInode(ctx, root, nil, m.Destination, linux.MaxSymlinkTraversals)
+ dirent, err := mns.FindInode(ctx, root, nil, dest, linux.MaxSymlinkTraversals)
if err != nil {
- return fmt.Errorf("failed to find mount destination %q: %v", m.Destination, err)
+ return fmt.Errorf("failed to find mount destination %q: %v", dest, err)
}
defer dirent.DecRef()
if err := mns.Mount(ctx, dirent, inode); err != nil {
- return fmt.Errorf("failed to mount at destination %q: %v", m.Destination, err)
+ return fmt.Errorf("failed to mount at destination %q: %v", dest, err)
}
- log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type)
+ log.Infof("Mounted %q to %q type %s", m.Source, dest, m.Type)
return nil
}
func mkdirAll(ctx context.Context, mns *fs.MountNamespace, path string) error {
+ log.Infof("mkdirAll called with path %s", path)
root := mns.Root()
defer root.DecRef()
// Starting at the root, walk the path.
parent := root
ps := strings.Split(filepath.Clean(path), string(filepath.Separator))
- for i := 0; i < len(ps); i++ {
- if ps[i] == "" {
+ for _, pathElem := range ps {
+ if pathElem == "" {
// This will be case for the first and last element, if the path
// begins or ends with '/'. Note that we always treat the path as
// absolute, regardless of what the first character contains.
continue
}
- d, err := mns.FindInode(ctx, root, parent, ps[i], fs.DefaultTraversalLimit)
+ d, err := mns.FindInode(ctx, root, parent, pathElem, fs.DefaultTraversalLimit)
if err == syserror.ENOENT {
// If we encounter a path that does not exist, then
// create it.
- if err := parent.CreateDirectory(ctx, root, ps[i], fs.FilePermsFromMode(0755)); err != nil {
- return fmt.Errorf("failed to create directory %q: %v", ps[i], err)
+ if err := parent.CreateDirectory(ctx, root, pathElem, fs.FilePermsFromMode(0755)); err != nil {
+ return fmt.Errorf("failed to create directory %q: %v", pathElem, err)
}
- if d, err = parent.Walk(ctx, root, ps[i]); err != nil {
- return fmt.Errorf("walk to %q failed: %v", ps[i], err)
+ if d, err = parent.Walk(ctx, root, pathElem); err != nil {
+ return fmt.Errorf("walk to %q failed: %v", pathElem, err)
}
} else if err != nil {
- return fmt.Errorf("failed to find inode %q: %v", ps[i], err)
+ return fmt.Errorf("failed to find inode %q: %v", pathElem, err)
}
parent = d
}
@@ -444,7 +465,7 @@ func destinations(mounts []specs.Mount, extra ...string) []string {
// mountDevice returns a device string based on the fs type and target
// of the mount.
func mountDevice(m specs.Mount) string {
- if m.Type == "bind" {
+ if m.Type == bind {
// Make a device string that includes the target, which is consistent across
// S/R and uniquely identifies the connection.
return "9pfs-" + m.Destination
@@ -589,7 +610,7 @@ func subtargets(root string, mnts []specs.Mount) []string {
// setFileSystemForProcess is used to set up the file system and amend the procArgs accordingly.
// procArgs are passed by reference and the FDMap field is modified.
-func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel) error {
+func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel, cid string) error {
ctx := procArgs.NewContext(k)
// Create the FD map, which will set stdin, stdout, and stderr. If
@@ -604,27 +625,79 @@ func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spe
// won't need ours either way.
procArgs.FDMap = fdm
+ // Use root user to configure mounts. The current user might not have
+ // permission to do so.
+ rootProcArgs := kernel.CreateProcessArgs{
+ WorkingDirectory: "/",
+ Credentials: auth.NewRootCredentials(creds.UserNamespace),
+ Umask: 0022,
+ MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
+ }
+ rootCtx := rootProcArgs.NewContext(k)
+
// If this is the root container, we also need to setup the root mount
// namespace.
- if k.RootMountNamespace() == nil {
- // Use root user to configure mounts. The current user might not have
- // permission to do so.
- rootProcArgs := kernel.CreateProcessArgs{
- WorkingDirectory: "/",
- Credentials: auth.NewRootCredentials(creds.UserNamespace),
- Umask: 0022,
- MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
- }
- rootCtx := rootProcArgs.NewContext(k)
-
+ mns := k.RootMountNamespace()
+ if mns == nil {
// Create the virtual filesystem.
mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs)
if err != nil {
return fmt.Errorf("error creating mounts: %v", err)
}
-
k.SetRootMountNamespace(mns)
+ return nil
+ }
+
+ // Setup a child container.
+
+ // Create the container's root filesystem mount.
+ log.Infof("Creating new process in child container.")
+ fds := &fdDispenser{fds: append([]int{}, ioFDs...)}
+ rootInode, err := createRootMount(rootCtx, spec, conf, fds, nil)
+ if err != nil {
+ return fmt.Errorf("error creating filesystem for container: %v", err)
+ }
+
+ // Make directories for submounts within the container.
+ rootDir := mns.Root()
+ defer rootDir.DecRef()
+ containerRoot := filepath.Join(childContainersDir, cid)
+ mkdirAll(ctx, mns, containerRoot)
+
+ // Mount the container's root filesystem to the newly created
+ // mount point.
+ containerRootDirent, err := mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals)
+ if err != nil {
+ return fmt.Errorf("failed to find mount destination: %q: %v", containerRoot, err)
+ }
+ if err := mns.Mount(ctx, containerRootDirent, rootInode); err != nil {
+ return fmt.Errorf("failed to mount at destination %q: %v", containerRoot, err)
+ }
+ containerRootDirent.DecRef()
+
+ // We have to re-walk to the dirent to find the mounted
+ // directory. The old dirent is invalid at this point.
+ containerRootDirent, err = mns.FindInode(ctx, rootDir, nil, containerRoot, linux.MaxSymlinkTraversals)
+ if err != nil {
+ return fmt.Errorf("failed to find mount destination2: %q: %v", containerRoot, err)
+ }
+ log.Infof("Mounted child's root fs to %q", containerRoot)
+
+ // Mount all submounts.
+ mounts := compileMounts(spec)
+ for _, m := range mounts {
+ // TODO: Enable bind mounts in child containers.
+ if m.Type == bind {
+ log.Infof("Bind mounts in child containers are not yet supported: %+v", m)
+ continue
+ }
+ dest := filepath.Join(containerRoot, m.Destination)
+ if err := mountSubmount(rootCtx, conf, k.RootMountNamespace(), fds, m, mounts, dest); err != nil {
+ return fmt.Errorf("error mounting filesystem for container: %v", err)
+ }
}
+ // Set the procArgs root directory.
+ procArgs.Root = containerRootDirent
return nil
}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index f6c7bf223..7debf0ac2 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -19,6 +19,7 @@ import (
"errors"
"fmt"
"math/rand"
+ "os"
"runtime"
"sync"
"sync/atomic"
@@ -229,7 +230,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD int, ioFDs []int, console
// Ensure that signals received are forwarded to the emulated kernel.
stopSignalForwarding := sighandling.PrepareForwarding(k, false)()
- procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k)
+ procArgs, err := newProcess(spec, creds, utsns, ipcns, k)
if err != nil {
return nil, fmt.Errorf("failed to create root process: %v", err)
}
@@ -250,7 +251,7 @@ func New(spec *specs.Spec, conf *Config, controllerFD int, ioFDs []int, console
}
// newProcess creates a process that can be run with kernel.CreateProcess.
-func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) {
+func newProcess(spec *specs.Spec, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) {
// Create initial limits.
ls, err := createLimitSet(spec)
if err != nil {
@@ -277,7 +278,6 @@ func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds
UTSNamespace: utsns,
IPCNamespace: ipcns,
}
-
return procArgs, nil
}
@@ -356,7 +356,8 @@ func (l *Loader) run() error {
l.console,
l.rootProcArgs.Credentials,
l.rootProcArgs.Limits,
- l.k)
+ l.k,
+ "" /* CID, which isn't needed for the root container */)
if err != nil {
return err
}
@@ -376,8 +377,7 @@ func (l *Loader) run() error {
// startContainer starts a child container. It returns the thread group ID of
// the newly created process.
-func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.ThreadID, error) {
- spec := args.Spec
+func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config, cid string, file *os.File) (kernel.ThreadID, error) {
// Create capabilities.
caps, err := specutils.Capabilities(spec.Process.Capabilities)
if err != nil {
@@ -406,26 +406,24 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa
// when indicated by the spec.
procArgs, err := newProcess(
- args.Spec,
- args.Conf,
- nil, // ioFDs
- false, // console
+ spec,
creds,
- k.RootUTSNamespace(),
- k.RootIPCNamespace(),
- k)
+ l.k.RootUTSNamespace(),
+ l.k.RootIPCNamespace(),
+ l.k)
if err != nil {
return 0, fmt.Errorf("failed to create new process: %v", err)
}
err = setFileSystemForProcess(
&procArgs,
- args.Spec,
- args.Conf,
- nil,
+ spec,
+ conf,
+ []int{int(file.Fd())}, // ioFDs
false,
creds,
procArgs.Limits,
- k)
+ k,
+ cid)
if err != nil {
return 0, fmt.Errorf("failed to create new process: %v", err)
}
@@ -435,7 +433,7 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa
return 0, fmt.Errorf("failed to create process in sentry: %v", err)
}
- ts := k.TaskSet()
+ ts := l.k.TaskSet()
tgid := ts.Root.IDOfThreadGroup(tg)
if tgid == 0 {
return 0, errors.New("failed to get thread group ID of new process")
@@ -446,7 +444,7 @@ func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) (kernel.Threa
l.mu.Lock()
defer l.mu.Unlock()
- l.containerRootTGIDs[args.CID] = tgid
+ l.containerRootTGIDs[cid] = tgid
return tgid, nil
}